import struct
from glob    import iglob
from os      import SEEK_SET
from cPickle import UnpicklingError
from .field     import Field
from .fieldlist import FieldList
from .functions import flat, unpickle
from .netcdf.read import read as netcdf_read
from .pp.read     import read as pp_read
from .aggregate import aggregate as cf_aggregate

def read(files, verbose=False, index=None, ignore_ioerror=False, aggregate=True,
         umversion=None, squeeze=1, only_lama=False, not_lama=False,
         prop={}, attr={}, coord={}, cellsize={}):
    '''

Read fields from files from disk or from an OPeNDAP server.

Any amount of any combination of netCDF, PP and binary pickle format
files may be input for reading.

**NetCDF files**

    * Each field contains its file name in the `file` attribute.
    
    * The netCDF variable names of the field and its components are
      stored in the `ncvar` attributes.
    
    * Note that fields may be selected by netCDF variable name by
      setting a value (or values) of 'ncvar' via the `attr` parameter.
    
    * Fields referenced within CF-netCDF formula_terms or ancillary
      variables are not included in the returned list of fields.

**PP files**

    * Each field contains its file name in the `file` attribute.
    
    * It may be necessary to specify the UM version. See the
      `umversion` parameter for details.
    
    * If any PP files are read, the aggregation option 'strict_units'
      defaults to False for all input files.
    
**Binary pickle files**
    
    * Fields are reconstituted (unpickled) from each binary pickle
      file, such as one created by `cf.pickle`.
    
    * Note that ``cf.read('file.cfpkl')`` is equivalent to
      ``cf.unpickle('file.cfpkl')``.

**Files on an OPeNDAP server**

    * All files on OPeNDAP servers are assumed to be netCDF files.

:Parameters:

    files : (arbitrarily nested sequence of) str
        A string or arbitrarily nested sequence of strings giving the
        file names or OPenDAP URLs from which to read fields. When on
        disk, the file names may contain UNIX file name metacharacters
        as understood by the python `glob` module.
    
    index : int, optional
        Only return the field with this non-negative index in the
        otherwise returned full list of fields. By default return all
        (otherwise selected) fields from the input files.
    
    verbose : bool, optional
        If True then print information to stdout.
    
    umversion : int or float, optional
        The UM version to be used when not encoded in the PP header
        (as is the case for PP files created by the UM
        pre-vn5.3). Must be a number, such as 405 (for version 4.5) or
        606.2 (for version 6.6.2).

        In general, if the PP header encodes its own UM version then
        this parameter is ignored and otherwise an exception will be
        raised if the UM version is required. The only departure from
        this rule occurs when `umversion` defines a tertiary element
        (such as the 3 in 606.3), in which case if, and only if, the
        UM version encoded in the PP header matches the primary and
        secondary elements (such as the 606 in 606.3) then `umversion`
        will be used in preference.
      
        Ignored for any non-PP input files.

    ignore_ioerror : bool, optional
        If True then ignore any file which raises an IOError whilst
        being read, as would be the case for an empty file, unknown
        file format, etc. By default the IOError is raised.
    
    aggregate : bool or dict, optional
        If True or a dictionary then aggregate the fields read in from
        all input files into as few fields as possible using the CF
        aggregation rules. If a dictionary then it is passed as
        keyword arguments to the `aggregate` function.
    
    prop : dict, optional
        Only read fields matching the given conditions on their CF
        properties. Refer to the field's `match` method for details.

    attr : dict, optional
        Only read fields matching the given conditions on their
        attributes. Refer to the field's `match` method for details.

    coord : dict, optional
        Only read fields matching the given conditions on their
        coordinates. Refer to the field's `match` method for details.

    cellsize : dict, optional
        Only read fields matching the given conditions on their
        coordinates' cellsizes. Refer to the field's `match` method
        for details.

:Returns:
    
    out : FieldList
        A list of fields.

:Raises:

    IOError :
        Raised if `ignore_ioerror` is False and there was an I/0
        related failure, including unknown file format.


**Examples**

>>> f = cf.read('file*.nc')
>>> type(f)
<class 'cf.field.FieldList'>
>>> f
[<CF Field: pmsl(30, 24)>,
 <CF Field: z-squared(17, 30, 24)>,
 <CF Field: temperature(17, 30, 24)>,
 <CF Field: temperature_wind(17, 29, 24)>]

>>> cf.read('file*.nc')[0:2]
[<CF Field: pmsl(30, 24)>,
 <CF Field: z-squared(17, 30, 24)>]

>>> cf.read('file*.nc', index=0)
[<CF Field: pmsl(30, 24)>]

>>> cf.read('file*.nc')[-1]
<CF Field: temperature_wind(17, 29, 24)>

>>> cf.read('file*.nc', prop={'units': 'K'})
[<CF Field: temperature(17, 30, 24)>,
 <CF Field: temperature_wind(17, 29, 24)>]

>>> cf.read('file*.nc', attr={'ncvar': 'ta'})
[<CF Field: temperature(17, 30, 24)>]

>>> cf.read('file*.nc', prop={'standard_name': '.*pmsl*', 'units':'K|Pa'})[0]
<CF Field: pmsl(30, 24)>

>>> cf.read('file*.nc', prop={'units':['K, 'Pa']})
[<CF Field: pmsl(30, 24)>,
 <CF Field: temperature(17, 30, 24)>,
 <CF Field: temperature_wind(17, 29, 24)>]

'''

    # Initialize the output list of fields
    field_list = FieldList()

    if isinstance(aggregate, dict):
        aggregate_options = aggregate.copy()
        aggregate         = True
    else:
        aggregate_options = {}

    # Count the number of fields (in all files) and the number of
    # files
    field_counter = -1
    file_counter  = 0

    for file_glob in flat(files):

        if file_glob.startswith('http://'):
            # Do not glob a URL
            files2 = (file_glob,)
        else:
            # Glob files on disk
            files2 = iglob(file_glob)

        for filename in files2:

            # Print some informative messages
            if verbose and index is None:
                print 'File: %s' % filename

            # --------------------------------------------------------
            # Read the file into fields
            # --------------------------------------------------------
            fields = _read_a_file(filename,
                                  ignore_ioerror=ignore_ioerror,
                                  verbose=verbose,
                                  aggregate_options=aggregate_options,
                                  umversion=umversion,
                                  only_lama=only_lama,
                                  not_lama=not_lama)
            
            # ----------------------------------------------------------------
            # Select matching fields
            # ----------------------------------------------------------------
            fields = fields.subset(prop=prop, attr=attr,
                                   coord=coord, cellsize=cellsize)

            # --------------------------------------------------------
            # Add this file's fields to those already read from other
            # files
            # --------------------------------------------------------
            field_list.extend(fields)
   
            field_counter = len(field_list)
            file_counter += 1

            # Print some informative messages
            if verbose and index is None:
                i = field_counter - len(fields)
                for f in fields:
                    print '%d: %s' % (i, repr(f))
                    i += 1
            #--- End: if

            # --------------------------------------------------------
            # If we only want one field from all input files then
            # break now if we have got it
            # --------------------------------------------------------
            if index is not None and field_counter >= index+1:
                break
        #--- End: for
            
        # ------------------------------------------------------------
        # If we only one field from all input files then break now if
        # we have got it
        # ------------------------------------------------------------
        if index is not None and field_counter >= index:
            break
    #--- End: for     

    # Error check
    if not ignore_ioerror:
        if not file_counter:
            raise RuntimeError('No files found')
        if not field_list:
            raise RuntimeError('No fields found from '+str(file_counter)+' files')
    #--- End: if

    if index >= len(field_list):
        raise IndexError('FieldList index='+str(index)+' is out of range')

    # Print some informative messages
    if verbose:
        if index is None:  
            print("Read %d field%s from %d file%s" % 
                  (field_counter, ('s' if field_counter!=1 else ''),
                   file_counter , ('s' if file_counter !=1 else '')))
        else:
            print 'File: %(filename)s' % locals()
            print '%d: %s' % (0, repr(field_list[index]))
            print 'Read 1 field from 1 file'
    #--- End: if
    
    # ----------------------------------------------------------------
    # 
    # ----------------------------------------------------------------
    try:
        field_list = field_list[index]
    except TypeError:
        pass

    # ----------------------------------------------------------------
    # Aggregate the output fields
    # ----------------------------------------------------------------    
    if len(field_list) > 1 and aggregate:
        if verbose:
            print 'Aggregating fields ...'
            org_len = len(field_list)
            
        field_list = cf_aggregate(field_list, **aggregate_options)
        
        if verbose:
            nfields = len(field_list)
            print('%d input field%s aggregated into %d field%s' %
                  (org_len, ('' if org_len==1 else 's'), 
                   nfields, ('' if nfields==1 else 's')))
    #--- End: if

    # ----------------------------------------------------------------
    # Squeeze size one dimensions from the data arrays. Do one of:
    # 
    # 1) Squeeze the fields, i.e. remove all size one dimensions from
    #    all field data arrays
    #
    # 2) Unsqueeze the fields, i.e. Include all size 1 space
    #    dimensions in the data array.
    #
    # 3) Nothing
    # ----------------------------------------------------------------
    if squeeze == 1:
        field_list.squeeze()
    elif squeeze == 0:
        pass
    elif squeeze == -1:
        field_list.unsqueeze()
    else:
        raise ValueError("Incorrect setting of squeeze: %s" % squeeze)

    return field_list
#--- End: def

def _read_a_file(filename,
                 aggregate_options={},
                 ignore_ioerror=False,
                 verbose=False,
                 umversion=None,
                 only_lama=False,
                 not_lama=False):
    '''

Read the contents of a single file into a field list.

:Parameters:

    filename : str
        The file name.

    aggregate_options : dict, optional
        The keys and values of this dictionary may be passed as
        keyword parameters to an external call of the aggregate
        function.

    ignore_ioerror : bool, optional
        If True then return an empty field list if reading the file
        produces an IOError, as would be the case for an empty file,
        unknown file format, etc. By default the IOError is raised.
    
    verbose : bool, optional
        If True then print information to stdout.
    
:Returns:

    out : FieldList
        The fields in the file.

:Raises:

    IOError :
        If `ignore_ioerror` is False and

        * The file is a pickle file contains an object which is not a
          Field or a FieldList.

        * The file can not be opened.

        * The file can not be opened.

'''
    # Find this input file's format
    if filename.startswith('http://'):
        format = 'netCDF'
        openfile = filename
    else:

        # ------------------------------------------------------------
        # Try to unpickle the file
        # ------------------------------------------------------------
        try:
            fields = unpickle(filename)
        except UnpicklingError:
            # Couldn't unpickle the file, so carry on and try to read
            # it as a file of known format.
            pass
        except IOError as io_error:
            if ignore_ioerror:
                if verbose:
                    print('WARNING: Ignoring IOError: %s' % io_error)
                return FieldList()
            raise io_error
        else:
            # Successfully unpickled the file. Make sure that we have
            # a FieldList and return.            
            if isinstance(fields, Field):
                return Fieldlist([fields])
            elif isinstance(fields, FieldList):
                return fields
            else:
                io_error = ("Pickle file '%s' contains '%s' object" %
                            (filename, fields.__class__.__name__))
                if ignore_ioerror:
                    if verbose:
                        print('WARNING: Ignoring IOError: %s' % io_error)
                    return FieldList()
                raise IOError(io_error)
        #--- End: try

        # ------------------------------------------------------------
        # Still here? Then open the file and find its format
        # ------------------------------------------------------------
        try:
            openfile = open(filename, 'rb')             
        except IOError as io_error:
            if ignore_ioerror:
                if verbose:
                    print('WARNING: Ignoring IOError: %s' % io_error)
                return FieldList()
            raise io_error
        #--- End: try

        try:
            format = _file_format(openfile)        
        except IOError as io_error:
            if ignore_ioerror: 
                if verbose:
                    print('WARNING: Ignoring IOError: %s' % io_error)
                return FieldList()
            raise io_error
        #--- End: try
    #--- End: if

    # ----------------------------------------------------------------
    # Still here? Read the file into fields.
    # ----------------------------------------------------------------
    if format == 'netCDF':
        fields = netcdf_read(openfile, only_lama=only_lama, not_lama=not_lama)
        
    elif format == 'PP':        
        fields = pp_read(openfile, umversion=umversion)

        # For PP fields, the default is strict_units=False
        if 'strict_units' not in aggregate_options:
            aggregate_options['strict_units'] = False
    
    # Developers: Add more file formats here ...

    # ----------------------------------------------------------------
    # Return the fields
    # ----------------------------------------------------------------
    return fields
#--- End: def

def _file_format(openfile):
    '''

Read and interpret a file's magic number.

Takes a file object as input and assumes that we're currently pointing
to the beginning of the file. The file is rewound to the beginning
after reading the magic number.

Developers: For each new file format that is supported, another 'if'
clause needs to be added to this function.

:Parameters:

    openfile : file
        A python file object.

:Returns:

    out : str
        The format of the file.

:Raises:
 
    IOError :
        If the file has an unsupported format.

**Examples**

>>> try:
...     format = _file_format(openfile)        
... except IOError:
...     # Do something
... else:
...     # Do something else

''' 
    # Read the magic number    
    try:
        magic_number = struct.unpack('=L', openfile.read(4))[0]
    except struct.error:
        raise IOError("File %s is empty (contains fewer than 4 words)" %
                      openfile.name)

    # Reset the pointer to the beginning of the file
    openfile.seek(0, SEEK_SET)

    # ----------------------------------------------------------------
    # netCDF
    # ----------------------------------------------------------------
    if magic_number in (21382211, 1128547841, 1178880137, 38159427):
        return 'netCDF'

    # ----------------------------------------------------------------
    # PP
    # ----------------------------------------------------------------
    if magic_number in (256, 65536):
        return 'PP'

    # ----------------------------------------------------------------
    # Developers: Add more file formats here ...
    # ----------------------------------------------------------------

    # Still here?
    raise IOError("File %s has unsupported format: Magic number=%d" % 
                  (openfile.name, magic_number))
#--- End: def
